import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

print(__name__)

def get_content(target):
    #requests手册：https://docs.python-requests.org/zh_CN/latest/user/quickstart.html
    req = requests.get(url=target)
    # req.encoding = 'utf-8' # 有了这句话获取的中文内容会乱码
    html = req.text
    # BeautifulSoup手册：https://beautifulsoup.readthedocs.io/zh_CN/latest/
    bs = BeautifulSoup(html, 'lxml')
    texts = bs.find('div',id='content')
    content = texts.text.strip().split('\xa0'*4)
    return content

if __name__ == '__main__':
    server = 'http://www.ltoooo.com'
    target = 'http://www.ltoooo.com/0_71/'
    req = requests.get(url=target)
    html = req.text
    bs = BeautifulSoup(html,'lxml')
    # 注意获取class名称，后面要加下划线
    chapters = bs.find('div',class_='listmain')
    chapters = chapters.find_all('a')[:3]
    book_name = '诡秘之主.txt'
    for chapter in tqdm(chapters):
        url = server+chapter.get('href')
        chapter_name = chapter.string
        # print(chapter.string)
        # print(server+url)
        content = get_content(url)
        with open(book_name,'a',encoding='utf-8') as f:
            f.write(chapter_name)
            f.write('\n')
            # str.join()是将参数中各元素的内容以str连起来拼接成字符串，
            #如'-'.join(['a','b','c'])输出为'a-b-c'，在此为段落换行
            f.write('\n'.join(content))
            # f.write(''.join(content))
            f.write('\n')
        
        
        
        
        
        
        
        
        
        
        